/****************************************
This file contains the program that reads in
all the self-employment data.

Output is seearnings_wide, which is used in 03.sample_restrictions.sas

******************************************/



/******************* This reads in 2002 - 2006 self emp data ************/
%macro readin_0206;



%macro se(yr) ;



    
%if "&yr." = "2004" %then %let final = 2004final_v02 ;
%else %let final = final ; 

proc contents data=indir.ssn&yr.units_&final ; 
    title 'ssn units' ;
run;
            
data nese_&yr (keep= pik earn in_se year flag_uncertain ) ;
    set indir.ssn&yr.units_&final (in=a 
                                        keep=ssnunit_id naics_ssn_code naics_ssn_flg ssn_owner_src 
                                            earn_non_farm_se1  earn_non_farm_se2 naics_ssn_code 
                                            pay_wage_ssn_captr einunit_id_1040 einunit_id_bmf einunit_id_namtch
                                            c_se_indicator emp_stat exps_tot_ssn actv_stat ein 
                                            %if "&yr." ne "2005" and "&yr." ne "2006"  %then %do; 
                                            pik_spouse pik_owner pik_filer
                                            %end;
                                            %else %do;  
                                                ssn_spouse ssn_owner ssn_filer 
                                                rename = (ssn_spouse = pik_spouse
                                                          ssn_owner = pik_owner
                                                          ssn_filer = pik_filer )
                                            %end;
                                            
                                         where=(ssnunit_id ne . ));
            
            year=&yr;
    
            /* outputting both pik filer and pik_spouse */
            flag_uncertain = (pik_spouse ne [invalid pik] ) ;
            pik = pik_filer ;
            if flag_uncertain then do ; 
                earn = (earn_non_farm_se1+earn_non_farm_se2)/2 ; 
            end;  
            else earn = earn_non_farm_se1 ; 
            in_se = (earn>0) ;
            output; 
            if flag_uncertain then do ; 
                pik = pik_spouse  ;            
                earn = (earn_non_farm_se1+earn_non_farm_se2)/2 ; 
                in_se = (earn>0) ;
                output; 
            end;            
run;


%mend se ; 

%do yr = 2002 %to 2006 ;
      %se(&yr.) ;
%end; 

data OUTPUTS.non_emp_2002_2006 ; 
    set 
    %do yr = 2002 %to 2006 ; 
        nese_&yr. 
    %end; 
    ; 
run;            
            
proc sort data=OUTPUTS.non_emp_2002_2006 ; 
            by pik year ; 
run;

%mend readin_0206 ;

%readin_0206;


%macro readin; 
/**********************************
    Creating a loop for 2007 - 2016 
***********************************/
%let startyr = 2002 ; 
%let endyr = 2016 ;                 
                
                
                
%macro se_readin(yr) ;
    
    
libname indir "/data/economic/br/&yr." ; 
    
%if &yr. < 2008 %then %let post = &yr.final ; 
%else %if %eval("&yr." = "2007" or "&yr." = "2008" or "&yr." = "2011") %then %let post = updated ;
%else %let post = final ;          
                
proc contents data=indir.non&yr.emp_final;
                title "nonemp &yr." ;
run;
                
proc contents data=indir.ssn&yr.units_&post. ;
                title "ssn units &yr." ;
run;                
                
proc sort data=indir.non&yr.emp_final (keep=nonemp_id lgl_form_code rcpt einunit_id_bmf einunit_id_1040c 
                                        geo_cbsa_mail geo_state_fips_mail geo_county_fips_mail geo_zcta_mail 
                                        name1 name2 rcpt_orig 
                                        sch_1040 naics_code naics_edit_flg tab_flg 
                                        rename=(nonemp_id=ssnunit_id
                                                einunit_id_bmf=einunit_id_bmf_ne
                                                einunit_id_1040c=einunit_id_1040_ne)
                                       where=(ssnunit_id ne . )) 
    out=ne&yr;
    by ssnunit_id;
run;

data nese_&yr;
    merge indir.ssn&yr.units_&post. (in=a 
                                        keep=ssnunit_id naics_ssn_code naics_ssn_flg ssn_owner_src 
                                            earn_non_farm_se1  earn_non_farm_se2 naics_ssn_code 
                                            pay_wage_ssn_captr einunit_id_1040 einunit_id_bmf einunit_id_namtch
                                            c_se_indicator emp_stat exps_tot_ssn pik_spouse pik_owner pik_filer
                                            actv_stat ein
                                        rename=(einunit_id_bmf=einunit_id_bmf_ss 
                                                einunit_id_1040=einunit_id_1040_ss
                                                pik_owner=pik) where=(ssnunit_id ne . ))
            ne&yr. (in=b);
            by ssnunit_id;
            
            ina=0;
            inb=0;
            if a then ina=1;
            if b then inb=1;
            year=&yr;
            if pik=pik_filer then earn=earn_non_farm_se1;
            if pik=pik_spouse then earn=earn_non_farm_se2;                                                   
            emp=0;
            if pay_wage_ssn_captr > 0 then emp=1;
            low_rcpt=0;
            if rcpt_orig > 0 and rcpt_orig < 1000 then low_rcpt=1;

            se_miss=0;
            if earn_non_farm_se1 in (0,.) and earn_non_farm_se2 in (0,.) then se_miss=0;
            
            owner_found=0;           
            pik = put(pik,9.) ;
            if pik ne ' ' and pik ne [invalid pik]  then owner_found=1;
            if inb=1 or emp=1;
run;

%mend se_readin ;
           
%do yy = 2007 %to &endyr. ; 
            %se_readin(&yy.) ;
            
            proc sort data=nese_&yy. ; 
                by pik year; 
            run;
%end ; 
            
data se_long /view=se_long ;
       set 
            %do yy = 2007 %to &endyr. ; 
            nese_&yy.  (keep=pik low_rcpt earn year) 
            %end;
            OUTPUTS.non_emp_2002_2006 ; 
            ; 
       by pik year ; 
run;  


                
data OUTPUTS.seearnings_wide (keep=pik se_earn2002-se_earn2016 self_emp2002-self_emp2016) ; 
       set se_long ; 
       by pik year ; 
       retain se_earn&startyr.-se_earn&endyr. self_emp&startyr.-self_emp&endyr.;         
       array se_earn{&startyr:&endyr.} se_earn&startyr.-se_earn&endyr.; 
       array s_emp{&startyr.:&endyr.} self_emp&startyr.-self_emp&endyr.;
          
       if first.pik then do ; 
            do ii = &startyr. to &endyr. ; 
                se_earn{ii} = 0 ; 
                s_emp{ii} = 0 ; 
            end;
       end;
             
       if last.year then se_earn{year} = earn + se_earn{year} ;        
       /* making this a wide file */
       if last.pik then do ; 
            do ii = &startyr. to &endyr. ;
                se_earn{ii}= se_earn{ii}*1000; 
                if se_earn{ii}>0 then s_emp{ii} = 1 ;                 
            end;
            output ;
       end;
run;
       
                
/***********************
    of note: the ssnunit_id - pik is the unique identifier combination, 
             so that ssnunit_id can be thought of as the EIN for our purposes
*************************/
%mend readin; 
 %readin; 
